# daily stats on composition by past exposure

library(tidyverse)
library(magrittr)
library(data.table)
library(lubridate)
library(hms)
library(ggthemes)
theme_set(theme_few())

### SET WORKING DIRECTORY HERE ###
path_to_archive <- "replication/"
data_dir <- paste0(path_to_archive, "data/")
setwd(data_dir)
plot_dir <- paste0(path_to_archive, "plots/")
tables_dir <- paste0(path_to_archive, "tables/")

dates <- seq(mdy("09/01/2012"), mdy("11/06/2012"), by='days')

g_count <- function(d) {
	cat(as.character(d),"\n")
	readRDS(paste0(data_dir, "dd/tc_groups_", as.character(d), ".rds")) %>%
		mutate(n_t = map_dbl(t_c, ~ .[group=="T", .N]),
			   n_c1 = map_dbl(t_c, ~ .[group=="C1", .N]),
			   n_c2 = map_dbl(t_c, ~ .[group=="C2", .N]),
			   n_c12 = map_dbl(t_c, ~ .[group=="C1+C2", .N])) %>%
		filter(n_t > 0, (n_c1 + n_c2 + n_c12 > 0)) %>% 
		unnest(cols=c(t_c)) %>% 
		as.data.table %>%
		.[group!="T",group:="C"] %>%
		.[,.(t_count=sum(group=="T"), c_count = sum(group=="C")),by=.(device_id)] %>%
        .[,date:=d]
}

daily_counts <- map(dates, g_count) %>% 
	rbindlist %>% 
    .[order(device_id, date)] %>%
	.[,.(date, cumulative_exposure = cumsum(t_count)), by=.(device_id)] %>%
    .[,cumulative_exposure := fct_lump_n(factor(cumulative_exposure), n = 5, other_level = "5+")] %>%
    .[,.(count = .N), by = .(date, cumulative_exposure)] %>%
    .[,proportion := count / sum(count), by = .(date)]


ggplot(aes(x=date, y=proportion, group = cumulative_exposure),
       data = daily_counts) + 
       geom_area(aes(fill = cumulative_exposure)) +
       labs(x = "Date", y = "Proportion of Devices among all T/C Groups each Day") +
       scale_fill_discrete(name = "Cumulative Ad Exposures") +
       theme(legend.position = "bottom")

ggsave(paste0(plot_dir, "cumulative_exposures_over_time.png"))
